# -*- coding: utf-8 -*-
import math
import scrapy
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.model.items import Box
from zc_core.dao.catalog_dao import CatalogDao
from energyahead.utils.login import SeleniumLogin
from energyahead.rules import *
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'
    # 覆盖配置
    custom_settings = {
        'DOWNLOAD_DELAY': 2
    }
    # 常用链接
    routing_url = 'https://eportal.energyahead.com/rest/service/routing'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        self.page_size = 500
        self.page_limit = math.floor(10000 / self.page_size)

    def start_requests(self):
        login = SeleniumLogin()
        cookie_info = login.get_cookies()
        self.cookies = cookie_info.get('cookies', {})
        self.account = cookie_info.get('account', '')
        if not self.cookies or not self.account:
            self.logger.error('init cookie failed...')
            return
        self.token = self.account.get('token', '')
        if not self.token:
            self.logger.error('init token failed...')
            return

        # self.cookies = {'SHAREJSESSIONID': 'd88d484a-61b2-4e1e-a402-997ca1ceb154'}
        # self.token = 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJob25nd2RAcGV0cm9jaGluYS5jb20uY24iLCJzdWIiOiJidXNpbmVzcyIsImlzcyI6ImF1dGgwIn0.X2cbNT-M22fTQagW9stfBXTnVnsWaDajdtKDYy7XSo0'
        self.logger.info('init cookie: %s', self.cookies)
        self.logger.info('init token: %s', self.token)

        catalog_list = CatalogDao().get_batch_cat_list(self.batch_no, fields={'_id': 1, 'catalogCode': 1},
                                                       query={'level': 1})
        if catalog_list and len(catalog_list) > 0:
            for cat in catalog_list:
                page = 1
                code = cat.get('catalogCode')
                for third_publish_status in [1, 2]:
                    req_bo = {
                        'pageNo': page,
                        'pageSize': self.page_size,
                        'goodsCode': code,
                        'commoditySource': '',
                        'thirdPublishStatus': third_publish_status,
                        'publishStatus': '',
                        'commodityName': '',
                        'commodityCode': '',
                        'itemId': '',
                        'updateLoginId': '',
                        'updateTimeMin': '',
                        'updateTimeMax': '',
                        'thirdCommodityCode': ''
                    }
                    # 请求页数
                    yield scrapy.FormRequest(
                        method='POST',
                        url=self.routing_url,
                        formdata={
                            'service': 'queryThirdPartyCommodityPageListForPortalPublishQuery',
                            'token': self.token,
                            'ThirdPartyCommodityReqBO': json.dumps(req_bo)
                        },
                        meta={
                            'batchNo': self.batch_no,
                            'catalogCode': code,
                            'page': page,
                            'thirdPublishStatus': third_publish_status,
                        },
                        cookies=self.cookies,
                        callback=self.parse_sku_page,
                        errback=self.error_back,
                        priority=10,
                        dont_filter=True
                    )

    # 处理sku列表
    def parse_sku_page(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        code = meta.get('catalogCode')
        third_publish_status = meta.get('thirdPublishStatus')

        # 本页商品
        sku_list, item_list = parse_back_sku(response)
        if sku_list and item_list:
            self.logger.info(
                '清单: status=%s, cat=%s, page=%s, cnt=%s' % (third_publish_status, code, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空: status=%s, cat=%s, page=%s' % (third_publish_status, code, cur_page))

        total_page = parse_back_sku_page(response)
        if total_page:
            self.logger.info('页数: status=%s, cat=%s, total=%s' % (third_publish_status, code, total_page))
            for page in range(2, total_page + 1):
                req_bo = {
                    'pageNo': page,
                    'pageSize': self.page_size,
                    'goodsCode': code,
                    'commoditySource': '',
                    'thirdPublishStatus': third_publish_status,
                    'publishStatus': '',
                    'commodityName': '',
                    'commodityCode': '',
                    'itemId': '',
                    'updateLoginId': '',
                    'updateTimeMin': '',
                    'updateTimeMax': '',
                    'thirdCommodityCode': ''
                }
                # 请求页数
                yield scrapy.FormRequest(
                    method='POST',
                    url=self.routing_url,
                    formdata={
                        'service': 'queryThirdPartyCommodityPageListForPortalPublishQuery',
                        'token': self.token,
                        'ThirdPartyCommodityReqBO': json.dumps(req_bo)
                    },
                    meta={
                        'batchNo': self.batch_no,
                        'catalogCode': code,
                        'page': page,
                        'thirdPublishStatus': third_publish_status,
                    },
                    cookies=self.cookies,
                    callback=self.parse_back_sku,
                    errback=self.error_back,
                    priority=10,
                    dont_filter=True
                )

    # 处理sku列表
    def parse_back_sku(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        code = meta.get('catalogCode')
        third_publish_status = meta.get('thirdPublishStatus')

        # 本页商品
        sku_list, item_list = parse_back_sku(response)
        if sku_list and item_list:
            self.logger.info(
                '清单: status=%s, cat=%s, page=%s, cnt=%s' % (third_publish_status, code, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('分页为空: status=%s, cat=%s, page=%s' % (third_publish_status, code, cur_page))
